Abstract

Introduction

Methods

Results

Data Cleaning

library(tidyverse)
## ── Attaching packages ───────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.4.2     ✔ dplyr   0.7.4
## ✔ tidyr   0.8.0     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.2.0
## ── Conflicts ──────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(phyloseq)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
#Mothur data
load(file="mothur.RData")
set.seed(4832)
m.norm = rarefy_even_depth(mothur, sample.size=100000)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 614OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
m.perc = transform_sample_counts(m.norm, function(x) 100 * x/sum(x))

#Qiime2 data
load(file="qiime2.RData")
set.seed(4832)
q.norm = rarefy_even_depth(qiime2, sample.size=100000)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## 
## ...
## 6OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
q.perc = transform_sample_counts(q.norm, function(x) 100 * x/sum(x))

How does microbial community structure change with depth and oxygen concentration?

Alpha-diversity

#Mothur data
m.alpha = estimate_richness(m.norm, measures = c("Chao1", "Shannon"))
m.meta.alpha = full_join(rownames_to_column(m.alpha), rownames_to_column(data.frame(m.perc@sam_data)), by = "rowname")

#Mother data - Alpha diversity across depth
m.meta.alpha %>%
 
ggplot() +
  geom_point(aes(x=Depth_m, y=Shannon)) +
   geom_smooth(method='auto', aes(x=as.numeric(Depth_m), y=Shannon)) +
  labs(title="Figure 1: Alpha-diversity across depth using mothur data", y="Shannon's diversity index", x="Depth (m)")
## `geom_smooth()` using method = 'loess'

#Mothur data - Alpha diversity across oxygen
m.meta.alpha %>%
ggplot() +
  geom_point(aes(x=O2_uM, y=Shannon)) +
  labs(title="Figure 2: Alpha-diversity across oxygen using mothur data", y="Shannon's diversity index", x="Oxygen (uM)")

#Mothur data - Alpha diversity by oxic/anoxic
m.meta.alpha %>%
  mutate(O2_group = ifelse(O2_uM == 0, "anoxic", "oxic")) %>%
ggplot() +
  geom_boxplot(aes(x=O2_group, y=Shannon)) +
  labs(title="Figure 3: Alpha-diversity by oxic/anoxic using mothur data", y="Shannon's diversity index", x="Oxygen")

#Qiime2 data
q.alpha = estimate_richness(q.norm, measures = c("Chao1", "Shannon"))
q.meta.alpha = full_join(rownames_to_column(q.alpha), rownames_to_column(data.frame(q.perc@sam_data)), by = "rowname")

#Qiime 2 data - Alpha diversity across depth
q.meta.alpha %>%
 
ggplot() +
  geom_point(aes(x=Depth_m, y=Shannon)) +
   geom_smooth(method='auto', aes(x=as.numeric(Depth_m), y=Shannon)) +
  labs(title="Figure 4: Alpha-diversity across depth using qiime2 data", y="Shannon's diversity index", x="Depth (m)")
## `geom_smooth()` using method = 'loess'

#Qiime2 data - Alpha-diversity across oxygen
q.meta.alpha %>%
ggplot() +
  geom_point(aes(x=O2_uM, y=Shannon)) +
  labs(title="Figure 5: Alpha-diversity across oxygen using qiime2 data", y="Shannon's diversity index", x="Oxygen (uM)")

#Qiime2 data - Alpha-diversity by oxic/anoxic
q.meta.alpha %>%
  mutate(O2_group = ifelse(O2_uM == 0, "anoxic", "oxic")) %>%
ggplot() +
  geom_boxplot(aes(x=O2_group, y=Shannon)) +
  labs(title="Figure 6: Alpha-diversity by oxic/anoxic using qiime2 data", y="Shannon's diversity index", x="Oxygen")

Taxa presence and abundance

#Mothur data - Domain across samples
m.perc %>%
 
plot_bar(fill="Domain") +
  geom_bar(aes(fill=Domain), stat="identity") +
  labs(title="Figure 7: Domains across samples using mothur data")

#Mothur data - Phyla across samples
m.perc %>%
 
plot_bar() +
  geom_bar(aes(fill=Domain), stat="identity") +
  facet_wrap(~Phylum, scales="free_y")+
  labs(title="Figure 8: Phyla across samples using mothur data")

#Mothur data - Domain boxplots
m.perc %>%
  tax_glom(taxrank = 'Domain') %>%
  psmelt() %>%
ggplot() +
  geom_boxplot(aes(x=Domain, y=Abundance)) +
  coord_flip() +
  labs(title="Figure 9: Domain boxplots using mothur data")

#Qiime2 data - Domain across samples
q.perc %>%
 
plot_bar(fill="Domain") +
  geom_bar(aes(fill=Domain), stat="identity") +
  labs(title="Figure 10: Domains across samples using qiime2 data")

#Qiime2 data - Phyla across samples
q.perc %>%
 
plot_bar() +
  geom_bar(aes(fill=Domain), stat="identity") +
  facet_wrap(~Phylum, scales="free_y")+
  labs(title="Figure 11: Phyla across samples using qiime2 data")

#Qiime2 - Domain boxplots
q.perc %>%
  tax_glom(taxrank = 'Domain') %>%
  psmelt() %>%
ggplot() +
  geom_boxplot(aes(x=Domain, y=Abundance)) +
  coord_flip() +
  labs(title="Figure 12: Domain boxplots using qiime2 data")

Does your taxon of interest significantly differ in abundance with depth and/or oxygen concentration?

Mothur data

#Calculating significance with linear model for depth
m.norm %>% 
  subset_taxa(Phylum=="Cyanobacteria") %>% 
  tax_glom(taxrank = 'Phylum') %>%
  psmelt() %>%
  
  lm(Abundance ~ Depth_m, .) %>% 
  summary()
## 
## Call:
## lm(formula = Abundance ~ Depth_m, data = .)
## 
## Residuals:
##       1       2       6       4       5       3       7 
##  46.352 -50.700  20.041 -16.609  -3.284 -39.933  44.132 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 173.5309    39.3958   4.405  0.00699 **
## Depth_m      -1.0883     0.2864  -3.800  0.01263 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42.31 on 5 degrees of freedom
## Multiple R-squared:  0.7428, Adjusted R-squared:  0.6914 
## F-statistic: 14.44 on 1 and 5 DF,  p-value: 0.01263
#Plot to go along with linear model above
m.perc %>% 
  subset_taxa(Phylum=="Cyanobacteria") %>% 
  psmelt() %>% 
  group_by(Sample) %>% 
  summarize(Abundance_sum=sum(Abundance), Depth_m=mean(Depth_m)) %>% 
  
  ggplot() +
  geom_point(aes(x=Depth_m, y=Abundance_sum)) +
  geom_smooth(method='lm', aes(x=as.numeric(Depth_m), y=Abundance_sum)) +
  labs(title="Figure 13: Abundance of Cyanobacteria across depth")

#Calculating significance with linear model for oxygen
m.norm %>% 
  subset_taxa(Phylum=="Cyanobacteria") %>% 
  tax_glom(taxrank = 'Phylum') %>% 
  psmelt() %>% 
  
  lm(Abundance ~ O2_uM, .) %>% 
  summary()
## 
## Call:
## lm(formula = Abundance ~ O2_uM, data = .)
## 
## Residuals:
##       1       2       6       4       5       3       7 
##   6.768 -17.048  19.375  -4.217  12.375 -22.627   5.375 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -5.3745     7.4690   -0.72 0.504007    
## O2_uM         0.9582     0.0885   10.83 0.000117 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.87 on 5 degrees of freedom
## Multiple R-squared:  0.9591, Adjusted R-squared:  0.9509 
## F-statistic: 117.2 on 1 and 5 DF,  p-value: 0.0001167
#Plot to go along with linear model above
m.perc %>% 
  subset_taxa(Phylum=="Cyanobacteria") %>% 
  psmelt() %>% 
  group_by(Sample) %>% 
  summarize(Abundance_sum=sum(Abundance), O2_uM=mean(O2_uM)) %>% 
  
  ggplot() +
  geom_point(aes(x=O2_uM, y=Abundance_sum)) +
  geom_smooth(method='lm', aes(x=as.numeric(O2_uM), y=Abundance_sum)) +
  labs(title="Figure 14: Abundance of Cyanobacteria across oxygen concentrations")

Qiime2 data

#Calculating significance with linear model for depth
library(dplyr)
library(ggplot2)
# P-value
q.norm %>%
  subset_taxa(Phylum=="D_1__Cyanobacteria") %>%
  tax_glom(taxrank = 'Phylum') %>%
  psmelt() %>%
  
  lm(Abundance ~ Depth_m, .) %>%
  summary()
## 
## Call:
## lm(formula = Abundance ~ Depth_m, data = .)
## 
## Residuals:
##        1        4        5        3        2        6        7 
##   63.270  160.404   72.980  -30.172 -221.273  -44.444   -0.766 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 687.7807   122.7658   5.602   0.0025 **
## Depth_m      -3.3051     0.8925  -3.703   0.0140 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 131.8 on 5 degrees of freedom
## Multiple R-squared:  0.7328, Adjusted R-squared:  0.6794 
## F-statistic: 13.71 on 1 and 5 DF,  p-value: 0.01395
#Plot to go along with linear model above
q.perc %>%
  subset_taxa(Phylum=="D_1__Cyanobacteria") %>%
  psmelt() %>%
  group_by(Sample) %>%
  summarize(Abundance_sum=sum(Abundance), Depth_m=mean(Depth_m)) %>%
 
ggplot() +
  geom_point(aes(x=Depth_m, y=Abundance_sum)) +
  geom_smooth(method='lm', aes(x=as.numeric(Depth_m), y=Abundance_sum)) +
  labs(title="Figure 15: Abundance unclassified domain across depth using qiime2 data")

#Calculating significance with linear model for oxygen
q.norm %>%
  subset_taxa(Phylum=="D_1__Cyanobacteria") %>%
  tax_glom(taxrank = 'Phylum') %>%
  psmelt() %>%
  lm(Abundance ~ O2_uM, .) %>%
  summary()
## 
## Call:
## lm(formula = Abundance ~ O2_uM, data = .)
## 
## Residuals:
##         1         4         5         3         2         6         7 
##    0.5171  190.2269  105.9213   18.5371 -121.0450  -61.0787 -133.0787 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 159.0787    57.3201   2.775   0.0391 *
## O2_uM         2.5772     0.6792   3.794   0.0127 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 129.5 on 5 degrees of freedom
## Multiple R-squared:  0.7422, Adjusted R-squared:  0.6907 
## F-statistic:  14.4 on 1 and 5 DF,  p-value: 0.0127
#Plot to go along with linear model above
q.perc %>%
  subset_taxa(Phylum=="D_1__Cyanobacteria") %>%
  psmelt() %>%
  group_by(Sample) %>%
  summarize(Abundance_sum=sum(Abundance), O2_uM=mean(O2_uM)) %>%
 
ggplot() +
  geom_point(aes(x=O2_uM, y=Abundance_sum)) +
  geom_smooth(method='lm', aes(x=as.numeric(O2_uM), y=Abundance_sum)) +
  labs(title="Figure 16: Abundance unclassified domain across oxygen using qiime2 data")

Within your taxon, what is the richness (number of OTUs/ASVs)?

Table showing richness of OTUs using Mothur data

set.seed(4832)
m.norm = rarefy_even_depth(mothur, sample.size=100000)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 614OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
q.norm = rarefy_even_depth(qiime2, sample.size=100000)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## 
## ...
## 3OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
m.norm %>% 
  subset_taxa(Phylum=="Cyanobacteria") %>%
  estimate_richness(measures = c("Observed"))
##             Observed
## Saanich_010        5
## Saanich_100        4
## Saanich_120        1
## Saanich_135        4
## Saanich_150        3
## Saanich_165        3
## Saanich_200        0

Table showing richness of ASVs using Qiime2 data

q.norm %>% 
  subset_taxa(Phylum=="Cyanobacteria") %>%
  estimate_richness(measures = c("Observed"))
## Error in dimnames(x) <- dn: length of 'dimnames' [1] not equal to array extent

Do the abundances of OTUs/ASVs within your taxon of interest change significantly with depth and/or oxygen concentration?

Mothur data

#Calculating abundances of OTUs across depth
m.perc %>%
  subset_taxa(Phylum=="Cyanobacteria") %>%
  psmelt() %>%
 
ggplot() +
  geom_point(aes(x=Depth_m, y=Abundance)) +
  geom_smooth(method='lm', aes(x=Depth_m, y=Abundance)) +
  facet_wrap(~OTU, scales="free_y") +
  labs(title="Figure 17: Abundance of OTUs within cyanobacteria phylum across depth")

m.perc %>%
  subset_taxa(Phylum=="Cyanobacteria") %>%
  psmelt() %>%
 
ggplot() +
  geom_point(aes(x=Depth_m, y=OTU, size=Abundance, color=OTU)) +
  scale_size_continuous(range = c(1,6)) +
  labs(title="Figure 18: Abundance of OTUs within cyanobacteria phylum across depth")

#Calculating abundances of OTUs across oxygen
m.perc %>%
  subset_taxa(Phylum=="Cyanobacteria") %>%
  psmelt() %>%
 
ggplot() +
  geom_point(aes(x=O2_uM, y=Abundance)) +
  geom_smooth(method='lm', aes(x=O2_uM, y=Abundance)) +
  facet_wrap(~OTU, scales="free_y") +
  labs(title="Figure 19: Abundance of OTUs within cyanobacteria phylum across oxygen")

m.perc %>%
  subset_taxa(Phylum=="Cyanobacteria") %>%
  psmelt() %>%
 
ggplot() +
  geom_point(aes(x=O2_uM, y=OTU, size=Abundance, color=OTU)) +
  scale_size_continuous(range = c(1,6)) +
  labs(title="Figure 20: Abundance of OTUs within cyanobacteria phylum across oxygen")

Qiime2 data

#Calculating abundaces of ASVs across depth
q.perc %>%
  subset_taxa(Phylum=="D_1__Cyanobacteria") %>%
  psmelt() %>%
 
ggplot() +
  geom_point(aes(x=Depth_m, y=Abundance)) +
  geom_smooth(method='lm', aes(x=Depth_m, y=Abundance)) +
  facet_wrap(~OTU, scales="free_y") +
  labs(title="Figure 21: Abundance of ASVs within cyanobacteria phylum across depth")

q.perc %>%
  subset_taxa(Phylum=="D_1__Cyanobacteria") %>%
  psmelt() %>%
 
ggplot() +
  geom_point(aes(x=Depth_m, y=OTU, size=Abundance, color=OTU)) +
  scale_size_continuous(range = c(1,6)) +
  labs(title="Figure 22: Abundance of ASVs within cyanobacteria phylum across depth")

#Calculating abundance of ASVs across oxygen
q.perc %>%
  subset_taxa(Phylum=="D_1__Cyanobacteria") %>%
  psmelt() %>%
 
ggplot() +
  geom_point(aes(x=O2_uM, y=Abundance)) +
  geom_smooth(method='lm', aes(x=O2_uM, y=Abundance)) +
  facet_wrap(~OTU, scales="free_y") +
  labs(title="Figure 23: Abundance of ASVs within cyanobacteria phylum across oxygen")

q.perc %>%
  subset_taxa(Phylum=="D_1__Cyanobacteria") %>%
  psmelt() %>%
 
ggplot() +
  geom_point(aes(x=O2_uM, y=OTU, size=Abundance, color=OTU)) +
  scale_size_continuous(range = c(0,5)) +
  labs(title="Figure 24: Abundance of ASVs within cyanobacteria phylum across oxygen")

Discussion

References